/**********************************************************************/
/*
   Author: Karan Makkar
   Created: Oct  2023
   Updated: Aug 2025, by Youssef Assarssah
   Description: Summary Statistics by Report Won Prakerja
   Outputs: Table 3
*/
/**********************************************************************/

/*----------------------------------------------------*/
* Section: Setup
/*----------------------------------------------------*/

* include filepaths 
  if "$master_run" !="1" include "./Do/SET_FILEPATHS.do"

  clear all
  set more off
  set matsize 11000

* Log
  cap log close
  global prefix: display %tdCYND td(`c(current_date)')
  log using "$KP_logs/${prefix}_report_applywin_balance.txt", text replace

  global aug20 "$KP_deid_sakernas/Clean/sak_aug20_deid_clean_merged.dta"
  global feb21 "$KP_deid_sakernas/Clean/sak_feb21_deid_clean_merged.dta"
  global aug21 "$KP_deid_sakernas/Clean/sak_aug21_deid_clean_merged.dta"
  global mar20 "$KP_deid_susenas/Clean/sus_mar20_deid_clean_merged.dta"
  global sep20 "$KP_deid_susenas/Clean/sus_sep20_deid_clean_merged.dta"
  global mar21 "$KP_deid_susenas/Clean/sus_mar21_deid_clean_merged.dta"
  global sep21 "$KP_deid_susenas/Clean/sus_sep21_deid_clean_merged.dta"
  global mar22 "$KP_deid_susenas/Clean/sus_mar22_deid_clean_merged.dta"

* Include makestars program
  include "$KP_do/3 Functions/makestars.do"

/*----------------------------------------------------------------*/
* Report Apply vs Don't Apply in SAK (for appliers before survey)
/*----------------------------------------------------------------*/

  * Load data and append
  use "${aug20}", clear
  append using "${feb21}"
  append using "${aug21}"
  append using "${sep20}"
  append using "${mar21}"
  append using "${sep21}"
  append using "${mar22}"

  replace report_selected = hh_pk_win if inlist(sus_round, 6, 7)
  replace report_selected = get_pk if inlist(sus_round, 5, 8)

  * Win before survey var
  gen win_before_survey = .
  replace win_before_survey  = ever_win_3 if sak_round == 5 & inrange(batch, 2, 3)
  replace win_before_survey  = ever_win_11 if sak_round == 6 & inrange(batch, 2, 11)
  replace win_before_survey  = ever_win_17 if sak_round == 7 & inrange(batch, 2, 17)
  replace win_before_survey  = ever_win_5 if sus_round == 5 & inrange(batch, 2, 5)
  replace win_before_survey  = ever_win_11 if sus_round == 6 & inrange(batch, 2, 11)
  replace win_before_survey  = ever_win_18 if sus_round == 7 & inrange(batch, 2, 18)
  replace win_before_survey  = ever_win_22 if sus_round == 8 & inrange(batch, 2, 22)

  bysort anon_id4 sus_round sak_round: gen numapp = _N

 * Drop non-randomized batches
  drop if inlist(batch, 1, 15)

  keep if win_before_survey ==1

  replace city_sak_5 =. if sak_round ==7 // Making missing for now because Aug '21 has 96% missing.
  replace java_sak_5 =. if sak_round ==7 // Same story

  * Create common vars
  gen age_survey = age_sak if inrange(sak_round, 5, 7)
  replace age_survey = age_sus if inrange(sus_round, 5, 8)
  gen female_survey = female if inrange(sak_round, 5, 7)
  replace female_survey = female if inrange(sus_round, 5, 8)
  gen city_survey_5 = city_sak_5 if inrange(sak_round, 5, 7)
  replace city_survey_5 = city_sus_5 if inrange(sus_round, 5, 8)
  gen java_survey_5 = java_sak_5 if inrange(sak_round, 5, 7)
  replace java_survey_5 = java_sus_5 if inrange(sus_round, 5, 8)
  gen hh_size_survey = hh_size_sak if inrange(sak_round, 5, 7)
  replace hh_size_survey = hh_size_sus if inrange(sus_round, 5, 8)

  rename female female_sak
  gen female = gender ==0

  bysort anon_id4 sak_round sus_round: keep if _n == 1

  /*******************************
  Panel B: SAK Demog Data
  ********************************/ 
  global vars_sumstats = "age_survey female_survey city_survey_5 java_survey_5 school_years severe_disability hh_size_survey numapp hh_size_sak_5 hours_worked_precovid num_child use_internet"

  label var age_survey "Age"
  label var female_survey "Female"
  label var city_survey_5 "Urban (5 yrs ago)"
  label var java_survey_5 "Java (5 yrs ago)"
  label var school_years "Years of School"
  label var severe_disability "Severe Disability"
  label var hh_size_survey "HH Size"
  label var hh_size_sak_5 "Num Children Under 5 in HH"
  label var num_child "Num Children Under 18 in HH"
  label var hours_worked_precovid "Weekly Hours Worked Pre-Covid"
  label var use_internet "Uses Internet"
  label var numapp "Num Applications"
  
	* STORE VARIABLE LABELS IN LOCAL MACROS
	foreach v of varlist $vars_sumstats {
		local `v'_label: var label `v'
	}
		
		
		* Sum Table 
		local i=-1
		local j=0

		* initialize elements for list
		local format = "%12.3f"
		global outcome_names = `"   "'
		tokenize `"$vars_sumstats"'
		local num : word count $vars_sumstats
		forval z = 1/`num' {
		  local outcome`z' = "``z''"
		}
		
		foreach var in $vars_sumstats {

			local i=`i'+2
			local j=`j'+2
			local f=`j'+2

			di "`var'"

			local r`i'_c0: variable label `var'
			
			di "`r`i'_c0'"

			su `var'
			if (`r(mean)'!=0) &(`r(N)'!=0) {

			*Mean  - all
			** Report selected SAK
			if !inlist("`var'", "num_child", "use_internet") {
				qui summ `var' if report_selected==1 & inrange(sak_round, 5, 7)
				local r`i'_c1 = string(r(mean),"%4.2f")
				local r`j'_c1 = "(" + string(r(sd),"%4.2f") + ")"
				
				** DO NOT Report selected SAK
				qui summ `var' if report_selected==0 & inrange(sak_round, 5, 7)
				local r`i'_c2 = string(r(mean),"%4.2f")
				local r`j'_c2 = "(" + string(r(sd),"%4.2f") + ")"
			}

			if !inlist("`var'", "hh_size_sak_5", "hours_worked_precovid") {
				** Report selected SUS
				qui summ `var' if report_selected==1 & inrange(sus_round, 5, 8)
				local r`i'_c4 = string(r(mean),"%4.2f")
				local r`j'_c4 = "(" + string(r(sd),"%4.2f") + ")"

				** DO NOT Report selected SUS
				qui summ `var' if report_selected==0 & inrange(sus_round, 5, 8)
				local r`i'_c5 = string(r(mean),"%4.2f")
				local r`j'_c5 = "(" + string(r(sd),"%4.2f") + ")"
			}
			** regressing to get t-stats

			* SAK
			if !inlist("`var'", "num_child", "use_internet") {
				reghdfe `var' report_selected if inrange(sak_round, 5, 7), absorb(sak_round) vce(cluster anon_id4)

					local beta =  r(table)[1,1]*-1
					local pval = r(table)[4,1]
					local se = r(table)[2,1]
					local tstats = r(table)[3,1]

					local r`j'_c3 = "(" + string(`se',"%4.3fc") + ")"
					
					makestars, pointest(`beta') pval(`pval') bdec(3)
					di "`r(coeff)'"
					local r`i'_c3 =  r(coeff) 
			}
			
			* SUS
			if !inlist("`var'", "hh_size_sak_5", "hours_worked_precovid")  {
				reghdfe `var' report_selected if inrange(sus_round, 5, 8), absorb(sus_round) vce(cluster anon_id4)

					local beta =  r(table)[1,1]*-1
					local pval = r(table)[4,1]
					local se = r(table)[2,1]
					local tstats = r(table)[3,1]

					local r`j'_c6 = "(" + string(`se',"%4.3fc") + ")"
					
					makestars, pointest(`beta') pval(`pval') bdec(3)
					di "`r(coeff)'"
					local r`i'_c6 =  r(coeff) 
			}
		}
	}

		* final row with observations
			local f=`j'+2
			local f_1=`j'+3

		* to know the number of variables displayed in total in the balance table
		di "`i' : number of variables "

		* Get observation counts
		* SAK
		summ female if report_selected==1 & inrange(sak_round, 5, 7)
			local r`f'_c1 = string(r(N), "%9.0fc")
			di "`r`f'_c1'"
			
		summ female if report_selected==0 & inrange(sak_round, 5, 7)
			local r`f'_c2 = string(r(N), "%9.0fc")
			di "`r`f'_c2'"
		summ female if inrange(sak_round, 5, 7)
			local r`f'_c3 = string(r(N), "%9.0fc")
			di "`r`f'_c3'"

		* SUS
		summ female if report_selected==1 & inrange(sus_round, 5, 8)
			local r`f'_c4 = string(r(N), "%9.0fc")
			di "`r`f'_c4'"
			
		summ female if report_selected==0 & inrange(sus_round, 5, 8)
			local r`f'_c5 = string(r(N), "%9.0fc")
			di "`r`f'_c5'"
		summ female if inrange(sus_round, 5, 8)
			local r`f'_c6 = string(r(N), "%9.0fc")
			di "`r`f'_c6'"

	// ** write table header

	local tex_file = "$KP_output/tables/descriptive/tex/Table3.tex"
	
	 tempname myfile
	 #delimit ;
	file open `myfile' using "`tex_file'", write replace;
	 file write `myfile'
		_n "\hline \hline"
		_n "& \multicolumn{3}{c}{Workforce Survey} & \multicolumn{3}{c}{Welfare Survey} \\"
		_n "\cmidrule(lr){2-4}\cmidrule(lr){5-7}"
		_n "Variable & Report Won & Report Didn't Win & Difference & Report Won & Report Didn't Win & Difference \\"
		_n " & (1) & (2) & (3) & (4) & (5) & (6) \\"
		_n "\midrule" 
		;
	 file close `myfile' ;
	 #delimit cr


	 ** write table body
	 
	 di "`=`num'*2'"
	 
	 forval row = 1 / `=`num'*2' {
	   tempname myfile
	   local linespace = ""
	   if mod(`row', 2) == 0  local linespace = "\addlinespace"
	   // if (`row' == `num' * 2) local linespace = ""

	   #delimit ;
	   file open `myfile' using "`tex_file'", write append;
	   file write `myfile'
	  _n "`r`row'_c0' & `r`row'_c1' & `r`row'_c2' & `r`row'_c3' & `r`row'_c4' & `r`row'_c5' & `r`row'_c6' \\ `linespace'" ;
	   file close `myfile';
	   #delimit cr
	 }

	 ** write table footer
	 tempname myfile
	 #delimit ;
	 file open `myfile' using "`tex_file'", write append;
	 file write `myfile'
		_n "\\ Observations & `r`f'_c1' & `r`f'_c2' & `r`f'_c3' & `r`f'_c4' & `r`f'_c5' & `r`f'_c6' \\"
		_n " Batches & 2-14, 16-17 & 2-14, 16-17 & 2-14, 16-17 & 2-14, 16-22 & 2-14, 16-22 & 2-14, 16-22 \\"
		_n "\hline"
		;
	 file close `myfile';
	 #delimit cr